In [1]:
import os

## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')

import networkx as nx
from genesys_evaluate_v1 import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [2]:
## Conda Env pytorch-gpu on DCC
print(torch.__version__)
print(sc.__version__) 
1.13.0.post200
1.9.1
In [3]:
## Genes considered/used (shared among samples) 
gene_list = pd.read_csv('./gene_list_1108.csv')

Load Data¶

In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
    data = pickle.load(file_handle)
    
batch_size = 2000
dataset = Root_Dataset(data['X_test'], data['y_test'])
loader = DataLoader(dataset,
                         batch_size = batch_size,
                         shuffle = True, drop_last=True)
In [5]:
input_size = data['X_train'].shape[1]
## 10 cell types 
output_size = 10
embedding_dim = 256
hidden_dim = 256
n_layers = 2
device = "cpu"
path = "./"

Load trained GeneSys model¶

In [6]:
model = ClassifierLSTM(input_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
model.load_state_dict(torch.load(path+"best_ALL_1130_continue.pth", map_location=torch.device('cpu')))
model = model
model.eval()
Out[6]:
ClassifierLSTM(
  (fc1): Sequential(
    (0): Linear(in_features=17513, out_features=256, bias=True)
    (1): Dropout(p=0.2, inplace=False)
    (2): GaussianNoise()
  )
  (fc): Sequential(
    (0): ReLU()
    (1): Linear(in_features=512, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=10, bias=True)
  )
  (lstm): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (b_to_z): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (bz2_infer_z1): DBlock(
    (fc1): Linear(in_features=1024, out_features=256, bias=True)
    (fc2): Linear(in_features=1024, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z1_to_z2): DBlock(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=512, out_features=256, bias=True)
    (fc_mu): Linear(in_features=256, out_features=512, bias=True)
    (fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
  )
  (z_to_x): Decoder(
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=256, out_features=256, bias=True)
    (fc3): Linear(in_features=256, out_features=17513, bias=True)
  )
)
In [7]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [8]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
In [9]:
## GRN for the transition t0 to t1
for ct in cts:
    print(ct)
    cws = np.zeros((len(loader), 17513, 17513))
    with torch.no_grad():
        for i, sample in enumerate(loader):
            x = sample['x'].to(device)
            y = sample['y'].to(device)
            y_label = [num2class[i] for i in y.tolist()]
            
            pred_h = model.init_hidden(batch_size)
            tfrom = model.generate_current(x, pred_h, 0).to('cpu').detach().numpy()
            cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
            
            pred_h = model.init_hidden(batch_size)
            tto = model.generate_next(x, pred_h, 0).to('cpu').detach().numpy()   
            cto = tto[np.where(np.array(y_label)==ct)[0],:]
            
            cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
            cws[i] = cw
    
    ## Calculate mean across number of repeats
    cwm = np.mean(cws, axis=0)
    ctw[cts.index(ct)] = cwm
Atrichoblast
Trichoblast
Cortex
Endodermis
Pericycle
Procambium
Xylem
Phloem
Lateral Root Cap
Columella
In [10]:
# Save the array to disk
np.save('genesys_ctw_t0-t1.npy', ctw)
In [11]:
ctw = np.load('genesys_ctw_t0-t1.npy')
In [12]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
    ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [13]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
    ctw_f[i] = np.abs(ctw_z[i]) > threshold

Load TFs list¶

In [14]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [15]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [16]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[16]:
NAC001    1
PRE5      1
MYB118    1
MYB21     1
MYB0      1
Name: Name, dtype: int64

Network analysis¶

In [17]:
TFidx = []
for i in wanted_TFs['GeneID']:
    if i in gene_list['features'].tolist():
        TFidx.append(np.where(gene_list['features']==i)[0][0])

TFidx = np.sort(np.array(TFidx))
In [18]:
def network(i):
    ## No weights
    adj_nw = ctw_f[i]
    ## Weighted
    adj = ctw[i]*ctw_f[i]
    ## TF only
    adj = adj[np.ix_(TFidx,TFidx)]
    adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
    
    ## Remove no connect 
    regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
    taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
    ## Reciprocol
    keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
    #keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
    
    TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
    ## TF name to keep
    TFname = []
    for i in np.array(gene_list['features'][TFidx])[keepidx]:
        TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
        
    adj = adj[np.ix_(keepidx,keepidx)]
    
    # Create a NetworkX graph for non-directed edges
    G = nx.Graph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(i, j, weight=abs(weight), distance=1/abs(weight))
            

    ## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
    closeness_centrality = nx.closeness_centrality(G, distance='distance')
    ## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
    eigenvector_centrality = nx.eigenvector_centrality(G)
    
    # Create a NetworkX graph for diected edges
    G = nx.MultiDiGraph()  # supports directed edges and allows for multiple edges between the same pair of nodes
    
    # Add nodes to the graph
    num_nodes = adj.shape[0]
    for i, name in enumerate(TFname):
        G.add_node(i, name=name)
    
    # Add edges to the graph with weights
    for i in range(num_nodes):
        for j in range(num_nodes):
            weight = adj[i, j]
            if weight != 0:
                G.add_edge(i, j, weight=weight)
    
    ## Measures the number of connections (edges) each node has
    degree_centrality = nx.degree_centrality(G)
    # Calculate outgoing centrality
    out_centrality = nx.out_degree_centrality(G)
    # Calculate incoming centrality
    in_centrality = nx.in_degree_centrality(G)
    ## Measures the extent to which a node lies on the shortest paths between other nodes.
    betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
    
    ## Non_Reciprocal Out centrality
    # Visualize the graph
    pos = nx.spring_layout(G)  # Positions of the nodes
    
    # Node colors based on weighted betweenness centrality
    node_colors = [out_centrality[node] for node in G.nodes()]
    
    # Node sizes based on weighted betweenness centrality
    node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]

    # Get the edge weights as a dictionary
    edge_weights = nx.get_edge_attributes(G, 'weight')
    edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
    
    # Scale the edge weights to desired linewidths
    max_weight = max(edge_weights.values())
    edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
    
    # Draw the graph
    nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
    # Add node labels
    labels = {node: G.nodes[node]['name'] for node in G.nodes}
    nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
    
    # Add a colorbar to show the weighted betweenness centrality color mapping
    sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
    sm.set_array([])
    plt.colorbar(sm)
    
    # Show the plot
    plt.show()
    
    dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
    oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
    ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
    bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
    cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
    ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
    df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
    df.index =TFname
    df = df.sort_values('betweenness_centrality', ascending=False)
    
    return(df)
In [19]:
atri = network(0)
In [20]:
atri
Out[20]:
degree_centrality out_centrality in_centrality betweenness_centrality closeness_centrality eigenvector_centrality
HDA3 0.858357 0.056657 0.801700 0.993578 0.002648 0.118494
BRON 0.742210 0.169972 0.572238 0.991839 0.002406 0.112570
AT2G35605 1.025496 0.280453 0.745042 0.988355 0.002503 0.124640
NAC094 0.770538 0.283286 0.487252 0.987437 0.002262 0.109741
SMB 0.733711 0.201133 0.532578 0.986005 0.002383 0.107298
... ... ... ... ... ... ...
CCA1 0.011331 0.002833 0.008499 0.000000 0.000914 0.004676
IDD2 0.062323 0.036827 0.025496 0.000000 0.001067 0.019845
AT2G36930 0.175637 0.031161 0.144476 0.000000 0.001792 0.043830
NAC084 0.110482 0.093484 0.016997 0.000000 0.001513 0.030022
BPC5 0.011331 0.005666 0.005666 0.000000 0.000809 0.003438

354 rows × 6 columns

In [21]:
tri = network(1)
In [22]:
cor = network(2)
In [23]:
end = network(3)
In [24]:
per = network(4)
In [25]:
pro = network(5)
In [26]:
xyl = network(6)
In [27]:
phl = network(7)
In [28]:
lrc = network(8)
In [29]:
col = network(9)
In [30]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [31]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts(), columns=['tf_occurance'])
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [32]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[32]:
atri_betweenness_centrality tri_betweenness_centrality lrc_betweenness_centrality atri_out_centrality tri_out_centrality lrc_out_centrality atri_in_centrality tri_in_centrality lrc_in_centrality centrality_count centrality_sum
WRKY27 0.161690 0.114025 0.007362 0.150142 0.164875 0.084746 0.053824 0.150538 0.072639 9 9.959841
AT2G36026 0.005127 0.008690 0.011545 0.189802 0.157706 0.107748 0.062323 0.129032 0.150121 9 9.822093
In [33]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[33]:
atri_betweenness_centrality tri_betweenness_centrality atri_out_centrality tri_out_centrality atri_in_centrality tri_in_centrality centrality_count centrality_sum
GL2 0.728424 0.966736 0.201133 0.336918 0.294618 0.387097 6 8.914925
RHD6 0.000008 0.885047 0.127479 0.473118 0.039660 0.164875 6 7.690186
TTG2 0.168604 0.645548 0.059490 0.086022 0.084986 0.200717 6 7.245366
In [34]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[34]:
atri_betweenness_centrality atri_out_centrality atri_in_centrality centrality_count centrality_sum
IDD7 0.181424 0.444759 0.087819 3 3.714002
GRF3 0.005191 0.087819 0.031161 3 3.124171
HTA2 0.001602 0.161473 0.076487 3 3.239562
In [35]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[35]:
tri_betweenness_centrality tri_out_centrality tri_in_centrality centrality_count centrality_sum
AT3G53370 0.984168 0.096774 0.215054 3 4.295995
WRKY9 0.982749 0.189964 0.394265 3 4.566979
IAA14 0.861388 0.057348 0.075269 3 3.994005
AT1G61990 0.591076 0.215054 0.071685 3 3.877814
WRKY72 0.302571 0.451613 0.132616 3 3.886800
AT1G03650 0.099533 0.129032 0.075269 3 3.303834
GL3 0.011372 0.344086 0.060932 3 3.416389
AT5G06110 0.008883 0.039427 0.057348 3 3.105657
MYB47 0.003197 0.014337 0.021505 3 3.039040
MBD6 0.000026 0.236559 0.082437 3 3.319022
In [36]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[36]:
lrc_betweenness_centrality lrc_out_centrality lrc_in_centrality centrality_count centrality_sum
MYB3R-4 0.393633 0.127119 0.015738 3 3.536490
AT1G02670 0.009110 0.037530 0.035109 3 3.081749
AT1G76580 0.003632 0.025424 0.127119 3 3.156174
WRKY65 0.001419 0.013317 0.024213 3 3.038949
AT3G19360 0.001360 0.141646 0.029056 3 3.172063
AT2G34450 0.001224 0.035109 0.004843 3 3.041175
TFIIIA 0.001211 0.001211 0.065375 3 3.067797
ATE2F2 0.001208 0.018160 0.004843 3 3.024210
AT3G48600 0.001140 0.012107 0.033898 3 3.047145
AT5G41020 0.000544 0.042373 0.023002 3 3.065920
AT1G72030 0.000186 0.052058 0.029056 3 3.081300
AT5G19490 0.000170 0.066586 0.018160 3 3.084916
MBD5 0.000144 0.055690 0.007264 3 3.063098
TRP1 0.000054 0.076271 0.019370 3 3.095696
BRM 0.000003 0.039952 0.069007 3 3.108962
In [37]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[37]:
col_betweenness_centrality col_out_centrality col_in_centrality centrality_count centrality_sum
AT1G70000 0.403795 0.087515 0.075846 3 3.567156
HMG 0.343436 0.105018 0.071179 3 3.519632
AT3G02890 0.154368 0.026838 0.179697 3 3.360902
ARF6 0.126919 0.381564 0.302217 3 3.810700
HAM3 0.091571 0.017503 0.017503 3 3.126577
MYB14 0.021393 0.016336 0.028005 3 3.065734
TTG1 0.018636 0.026838 0.012835 3 3.058309
ERF3 0.018472 0.045508 0.095683 3 3.159662
IDD1 0.016494 0.091015 0.161027 3 3.268536
NTT 0.010256 0.275379 0.192532 3 3.478168
AT3G16280 0.003157 0.221704 0.032672 3 3.257533
AT2G47890 0.002889 0.019837 0.026838 3 3.049563
ALY2 0.001772 0.037340 0.101517 3 3.140629
WRKY7 0.001519 0.037340 0.026838 3 3.065696
AT1G05120 0.001167 0.074679 0.060677 3 3.136523
LOL2 0.001165 0.004667 0.011669 3 3.017502
AT2G47090 0.001080 0.010502 0.024504 3 3.036085
ZFP6 0.001005 0.299883 0.116686 3 3.417574
MBD7 0.000568 0.136523 0.031505 3 3.168596
AT4G27900 0.000484 0.154026 0.053676 3 3.208185
AT1G76350 0.000044 0.166861 0.031505 3 3.198410
AT3G20010 0.000012 0.030338 0.079347 3 3.109697
E2F1 0.000004 0.025671 0.051342 3 3.077017
ARR6 0.000004 0.145858 0.015169 3 3.161031
VRN1 0.000003 0.024504 0.022170 3 3.046677
In [38]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[38]:
cor_betweenness_centrality end_betweenness_centrality cor_out_centrality end_out_centrality cor_in_centrality end_in_centrality centrality_count centrality_sum
SCR 0.837784 0.665611 0.199219 0.156171 0.125000 0.158690 6 8.142475
MYB36 0.859631 0.443688 0.083984 0.171285 0.171875 0.234257 6 7.964720
MYB68 0.837684 0.401083 0.093750 0.166247 0.125000 0.171285 6 7.795048
3xHMG-box1 0.104035 0.000706 0.164062 0.251889 0.109375 0.108312 6 6.738381
PPD1 0.033822 0.014159 0.035156 0.206549 0.111328 0.130982 6 6.531997
AT1G16640 0.001953 0.023720 0.117188 0.123426 0.115234 0.133501 6 6.515022
MYB34 0.021679 0.002926 0.111328 0.138539 0.017578 0.020151 6 6.312202
In [39]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[39]:
cor_betweenness_centrality cor_out_centrality cor_in_centrality centrality_count centrality_sum
RVN 0.457857 0.037109 0.070312 3 3.565279
AT2G18850 0.244313 0.105469 0.035156 3 3.384938
AT3G55080 0.113897 0.136719 0.101562 3 3.352178
AT5G58280 0.032649 0.103516 0.050781 3 3.186946
ELO3 0.003042 0.044922 0.029297 3 3.077261
AT3G04850 0.000837 0.117188 0.019531 3 3.137556
In [40]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[40]:
end_betweenness_centrality end_out_centrality end_in_centrality centrality_count centrality_sum
NF-YB11 0.139722 0.010076 0.057935 3 3.207732
AT5G23930 0.000013 0.267003 0.078086 3 3.345101
In [41]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[41]:
per_betweenness_centrality pro_betweenness_centrality xyl_betweenness_centrality phl_betweenness_centrality per_out_centrality pro_out_centrality xyl_out_centrality phl_out_centrality per_in_centrality pro_in_centrality xyl_in_centrality phl_in_centrality centrality_count centrality_sum
AT3G43430 0.878139 0.889922 0.386851 0.340981 0.120192 0.752967 0.124764 0.919836 0.218750 0.771305 0.279773 0.924974 12 18.608454
BZIP61 0.101489 0.748565 0.773121 0.990170 0.153846 0.686084 0.349716 0.345324 0.194712 0.193096 0.332703 0.343268 12 17.212094
MYB20 0.828545 0.067231 0.051183 0.974602 0.038462 0.428263 0.289225 0.217883 0.266827 0.482201 0.260870 0.264132 12 16.169422
AT5G51780 0.781267 0.003206 0.487022 0.782528 0.170673 0.240561 0.395085 0.204522 0.173077 0.159655 0.285444 0.168551 12 15.851592
HB40 0.000962 0.043132 0.877442 0.100809 0.233173 0.221143 0.413989 0.271326 0.093750 0.073355 0.527410 0.367934 12 15.224425
NF-YB2 0.027375 0.317524 0.594242 0.045043 0.471154 0.238403 0.185255 0.205550 0.199519 0.140237 0.482042 0.188078 12 15.094423
AT3G06590 0.013102 0.717531 0.001113 0.008457 0.096154 0.271845 0.128544 0.173690 0.240385 0.187702 0.069943 0.189106 12 14.097572
IDD14 0.002218 0.565865 0.012108 0.001116 0.084135 0.154261 0.149338 0.154162 0.158654 0.186624 0.079395 0.187050 12 13.734927
AT1G64620 0.043646 0.001445 0.249234 0.000088 0.122596 0.097087 0.342155 0.038027 0.146635 0.058252 0.264650 0.058582 12 13.422396
bZIP4 0.016039 0.104889 0.001099 0.002131 0.228365 0.180151 0.102079 0.127441 0.139423 0.045307 0.043478 0.093525 12 13.083929
In [42]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[42]:
per_betweenness_centrality per_out_centrality per_in_centrality centrality_count centrality_sum
LBD39 0.002549 0.038462 0.060096 3 3.101106
ERF10 0.000023 0.115385 0.069712 3 3.185119
In [43]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[43]:
pro_betweenness_centrality pro_out_centrality pro_in_centrality centrality_count centrality_sum
GBF6 0.832425 0.190939 0.579288 3 4.602651
AT2G20100 0.734506 0.212513 0.117584 3 4.064603
GATA23 0.653813 0.247033 0.096009 3 3.996855
AT4G29930 0.637058 0.148867 0.221143 3 4.007069
TEM1 0.553198 0.062567 0.210356 3 3.826121
RD26 0.535346 0.102481 0.243797 3 3.881624
AT4G20970 0.485725 0.112190 0.066882 3 3.664797
AT5G61590 0.402557 0.162891 0.093851 3 3.659299
SCL1 0.390224 0.038835 0.080906 3 3.509965
TRFL10 0.357408 0.096009 0.080906 3 3.534323
LBD29 0.283109 0.079827 0.065804 3 3.428740
MYBR1 0.159231 0.080906 0.116505 3 3.356642
NAC089 0.084540 0.069040 0.057174 3 3.210753
BOP2 0.072036 0.020496 0.025890 3 3.118422
NAC083 0.032846 0.081985 0.070119 3 3.184949
HB5 0.002549 0.139159 0.019417 3 3.161125
NAC080 0.002035 0.029126 0.144552 3 3.175714
SCL3 0.001349 0.053937 0.033441 3 3.088728
LAS 0.001272 0.067961 0.030205 3 3.099438
WRKY19 0.001079 0.021575 0.094930 3 3.117584
ERF7 0.001078 0.064725 0.084142 3 3.149945
AT1G77570 0.000853 0.072276 0.037756 3 3.110885
AT4G35270 0.000811 0.033441 0.009709 3 3.043961
SAC51 0.000455 0.064725 0.102481 3 3.167662
AT5G58620 0.000398 0.036677 0.065804 3 3.102880
LRP1 0.000016 0.059331 0.050701 3 3.110049
In [44]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[44]:
xyl_betweenness_centrality xyl_out_centrality xyl_in_centrality centrality_count centrality_sum
MYB46 0.980262 0.298677 0.712665 3 4.991604
XND1 0.980019 0.387524 0.485822 3 4.853365
VND7 0.953475 0.519849 0.495274 3 4.968598
AT1G68200 0.921733 0.251418 0.413989 3 4.587139
HB31 0.920036 0.111531 0.310019 3 4.341586
... ... ... ... ... ...
FBH3 0.000823 0.088847 0.058601 3 3.148271
OFP1 0.000494 0.051040 0.086957 3 3.138490
AT3G10760 0.000412 0.069943 0.098299 3 3.168654
AT2G27930 0.000354 0.066163 0.064272 3 3.130789
OBP4 0.000179 0.102079 0.103970 3 3.206228

63 rows × 5 columns

In [45]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[45]:
phl_betweenness_centrality phl_out_centrality phl_in_centrality centrality_count centrality_sum
REM22 0.951334 0.139774 0.195272 3 4.286380
NAC2 0.941397 0.130524 0.196300 3 4.268221
APL 0.872514 0.689620 0.767729 3 5.329862
AT3G12730 0.866460 0.236382 0.411100 3 4.513942
NAC086 0.717771 0.054471 0.089414 3 3.861656
... ... ... ... ... ...
AT1G63840 0.000002 0.054471 0.066804 3 3.121277
AT5G12850 0.000001 0.055498 0.045221 3 3.100720
AT1G76870 0.000001 0.040082 0.013361 3 3.053444
SDG20 0.000001 0.010277 0.008222 3 3.018501
RGA1 0.000001 0.070915 0.061665 3 3.132581

81 rows × 5 columns

Search for individual genes¶

In [46]:
gene = 'SHR'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[46]:
tf_occurance atri_degree_centrality atri_out_centrality atri_in_centrality atri_closeness_centrality atri_eigenvector_centrality cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality ... xyl_out_centrality xyl_in_centrality xyl_betweenness_centrality xyl_closeness_centrality xyl_eigenvector_centrality phl_degree_centrality phl_out_centrality phl_in_centrality phl_closeness_centrality phl_eigenvector_centrality
SHR 5.0 0.167139 0.155807 0.011331 0.001709 0.046376 0.162109 0.115234 0.046875 0.680219 ... 0.334594 0.294896 0.438302 0.000752 0.082956 0.113052 0.083248 0.029805 0.00032 0.031782

1 rows × 41 columns

In [47]:
gene = 'BLJ'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[47]:
In [48]:
gene = 'JKD'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[48]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_betweenness_centrality end_closeness_centrality end_eigenvector_centrality col_degree_centrality col_out_centrality col_in_centrality col_betweenness_centrality col_closeness_centrality col_eigenvector_centrality
JKD 3.0 0.230469 0.017578 0.212891 0.176454 0.001429 0.057212 0.382872 0.007557 0.375315 0.147953 0.001485 0.078534 0.171529 0.010502 0.161027 0.738642 0.000553 0.051131
In [49]:
gene = 'RVN'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[49]:
tf_occurance cor_degree_centrality cor_out_centrality cor_in_centrality cor_betweenness_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_closeness_centrality end_eigenvector_centrality
RVN 1.0 0.107422 0.037109 0.070312 0.457857 0.001278 0.033988 0.153652 0.025189 0.128463 0.001301 0.043652
In [50]:
gene = 'BIB'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[50]:
end_degree_centrality end_out_centrality end_in_centrality end_closeness_centrality end_eigenvector_centrality
BIB 0.035264 0.007557 0.027708 0.001034 0.012088
In [51]:
gene = 'IME'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[51]:
In [52]:
gene = 'MYB66'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[52]:
In [53]:
gene = 'GL2'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
Out[53]:
tf_occurance atri_degree_centrality atri_out_centrality atri_in_centrality atri_betweenness_centrality atri_closeness_centrality atri_eigenvector_centrality tri_degree_centrality tri_out_centrality tri_in_centrality ... cor_degree_centrality cor_out_centrality cor_in_centrality cor_closeness_centrality cor_eigenvector_centrality end_degree_centrality end_out_centrality end_in_centrality end_closeness_centrality end_eigenvector_centrality
GL2 2.0 0.495751 0.201133 0.294618 0.728424 0.002126 0.086958 0.724014 0.336918 0.387097 ... 0.087891 0.068359 0.019531 0.001053 0.029496 0.110831 0.093199 0.017632 0.001138 0.033544

1 rows × 28 columns

In [54]:
tf_spec.to_csv('TF_GRN_centrality_t0-t1_zscore3.csv', index=True)
In [ ]:
 
In [ ]: